---
title: "Sample Characteristics Compared to National Demographics"
subtitle: "For Appendix A"
date: ""
output: pdf_document
fontsize: 11 pt
header-includes:
  \usepackage[T1]{fontenc}
  \usepackage[utf8]{inputenc}
  \usepackage{newpxtext,newpxmath}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)

library(tidyverse)
library(rstatix)
library(magrittr)
library(scales)

swtch <- function(x,i,j) {x[c(i,j)] <- x[c(j,i)]; x} 

analyze <- function(x, speeders = T) {
  df <- data.frame(data.table::fread(x))

  df$age = df$yob + (2022 - 2010 - 1)
  
  df %<>% filter(
    waiver == 1,
    citizen == 1,
    age >= 18,
    (speeders | `Duration..in.seconds.` >= 60 * 5), # Keep non-speeders
  )
  
  count(df)$n
  
  # Gender
  gender_table <- table(df$gender) %>% prop.table()
  
  # Age
  breaks <- c(18, 25, 35, 45, 55, 65, Inf)
  
  age_table <- cut(df$age, 
      breaks = breaks, right = FALSE) %>% table() %>% prop.table()
  
  # Race
  race_table <- table(df$racial) %>% prop.table()
  
  # Hispanic
  his_prop <- (table(df$hispanic) %>% prop.table())[[1]]
  
  # Income
  breaks_income <- c(1, 3, 5, 8, 13, 15, Inf)
  income_table <- cut(df[df$income < 50, ]$income, 
      breaks = breaks_income, right = FALSE) %>% table %>% prop.table
  
  l <- c(gender_table, age_table, race_table, his_prop, income_table) %>%
    percent(accuracy = 0.1, suffix = "\\%")
  l <- c(paste(count(df)$n), l)
  return(l)
}
```

```{r, include = F}
acs2019 <- c("&nbsp;", "48.7%", "51.3%", 
             "11.9%", "17.9%", "16.4%", "16.0%", "16.6%", "21.2%",
             "64.1%", "12.0%", "1.1%", "6.1%", "0.2%", "7.7%", "8.8%", "16.8%",
             "9.4%", "8.7%", "19.7%", "28.6%", "15.3%", "18.2%")

gen_table <- function(x, names) {
  t = NULL
  
  # Header
  t1 <- c("&nbsp;", "&nbsp;")
  for (i in 1:length(x)) {
      t1 <- c(t1, names[[i]])
  }
  t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  
  index = 1
  t <- c(t,paste0("| -------| ----------------------|", 
                  paste(rep("-----:|", length(x)), collapse = "")))
  
  # N
    t1 = c("n", "")
    for (i in 1:length(x)) {
      t1 <- c(t1, x[[i]][[index]])
    }
    index <- index + 1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  t <- c(t,paste(rep("|", 3+length(x)), collapse = ""))
  
  # Sex
  h1 <- c("Sex", "")
  h2 <- c("Male", "Female")
  for (i in 1:2){
    t1 = c(h1[[i]], h2[[i]])
    for (j in 1:length(x)){
      t1 <- c(t1, x[[j]][[index]])
    }
    index <- index+1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  }
  t <- c(t,paste(rep("|", 3+length(x)), collapse = ""))
  
  # Age
  h1 <- c("Age", rep("", 5))
  h2 <- c("18--24", "25--34", "35--44", "45--54", "55--64", "65+")
  for (i in 1:6){
    t1 = c(h1[[i]], h2[[i]])
    for (j in 1:length(x)){
      t1 <- c(t1, x[[j]][[index]])
    }
    index <- index + 1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  }
  t <- c(t,paste(rep("|", 3+length(x)), collapse = ""))
  
  # Race
  h1 <- c("Ethnicity", rep("", 6))
  h2 <- c("White", "Black", "Native American", "Asian", "Pacific Islander", 
          "Some other race", "Two or more races")
  for (i in 1:7){
    t1 = c(h1[[i]], h2[[i]])
    for (j in 1:length(x)){
      t1 <- c(t1, x[[j]][[index]])
    }
    index <- index + 1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  }
  t <- c(t,paste(rep("|", 3+length(x)), collapse = ""))
  
  # Hispanic
  h1 <- "Hispanic"
  h2 <- "Yes"
  
    t1 = c(h1, h2)
    for (j in 1:length(x)){
      t1 <- c(t1, x[[j]][[index]])
    }
    index <- index + 1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
    
  t <- c(t,paste(rep("|", 3+length(x)), collapse = ""))
  
  # Income
  h1 <- c("Income", rep("", 5))
  h2 <- c("<15k", "15k--24k", "25k--49k", "50k--99k", "100k--149k", "$\\geq$ 150k")
  for (i in 1:6){
    t1 = c(h1[[i]], h2[[i]])
    for (j in 1:length(x)){
      t1 <- c(t1, x[[j]][[index]])
    }
    index <- index + 1
    t <- c(t, paste("|", paste(t1, "|", sep = " ", collapse = " "), collapse = ""))
  }
  return(t)
}
```

```{r, results = 'asis'}
latest <- "~/Desktop/ideology-measure/replication/survey_data.csv"
sample <- analyze(latest)
sample_speeders_4 <- analyze(latest, speeders = F)

s_size <- as.numeric(sample[[1]])
s_sp_size <- as.numeric(sample_speeders_4[[1]])

sample_speeders_4 <- paste0("\\color{blue}{",sample_speeders_4,"}")
output <- gen_table(list(acs2019, sample_speeders_4), c("Benchmark", "Sample"))
cat(output, sep = "\n")
```

Note: A total of `r s_size` responses are recorded, `r s_size - s_sp_size` of which are deemed "speeders" (under 4 minutes) and excluded from the sample.